package main

import (
	"bufio"
	"bytes"
	"database/sql"
	_ "embed"
	"encoding/json"
	"flag"
	"fmt"
	"io"
	"os"
	"regexp"

	"golang.org/x/text/encoding/simplifiedchinese"
	"golang.org/x/text/transform"

	"github.com/gogs/chardet"

	_ "github.com/mattn/go-sqlite3"
)

//go:embed match.json
var defaultRegexp string

func detectFileCharset(fn string) (string, error) {
	fp, err := os.Open(fn)
	if err != nil {
		return "", err
	}
	defer fp.Close()
	data := make([]byte, 4*1024)
	_, err = fp.Read(data)
	if err != nil {
		return "", err
	}
	d := chardet.NewTextDetector()
	r, err := d.DetectAll(data)
	if err != nil {
		return "", err
	}

	for _, ret := range r {
		switch ret.Charset {
		case "GB18030":
			return "GB18030", nil
		case "GBK":
			return "GB18030", nil
		case "GB2312":
			return "GB18030", nil
		case "UTF-8":
			return "UTF-8", nil
		}
	}

	return "", nil
}

func main() {
	var rf = flag.String("r", "match.json", "正则表达式配置文件，指定问题和答案的正则表达式，参考内容："+defaultRegexp)
	flag.Parse()
	if flag.Arg(0) == "" {
		panic("必须指定纯文本输入文件")
	}

	fcharset, err := detectFileCharset(flag.Arg(0))
	if err != nil {
		panic(err)
	}

	fp, err := os.Open(flag.Arg(0))
	if err != nil {
		panic(err)
	}
	defer fp.Close()
	var rd1 io.Reader = fp
	if fcharset == "GB18030" {
		rd1 = transform.NewReader(fp, simplifiedchinese.GB18030.NewDecoder().Transformer)
	}

	rd := bufio.NewReader(rd1)
	section := []string{}
	db, err := openDB()
	if err != nil {
		panic(err)
	}
	defer db.Close()
	tx, err := db.Begin()
	defer tx.Commit()
	if err != nil {
		panic(err)
	}
	exps, err := getMatchRegexp(*rf)
	if err != nil {
		panic(err)
	}
	question := regexp.MustCompile(exps.Question)
	answers := []*regexp.Regexp{}
	for _, v := range exps.Answer {
		p1 := regexp.MustCompile(v)
		answers = append(answers, p1)
	}
	for {
		line1, _, err := rd.ReadLine()
		if err != nil {
			saveSection(tx, section)
			break
		}
		line1 = bytes.TrimSpace(line1)

		if question.Match(line1) {
			//save last section
			saveSection(tx, section)
			//create new section
			str1 := string(line1)
			section = []string{str1}
			continue
		}
		for _, answer := range answers {
			if answer.Match(line1) {
				str1 := string(line1)
				section = append(section, str1)
				//break跳出for循环，防止重复数据
				break
			}
		}
	}
}

func openDB() (*sql.DB, error) {
	db, err := sql.Open("sqlite3", "data.db")
	if err != nil {
		return nil, err
	}
	_, err = db.Exec("CREATE TABLE IF NOT EXISTS answers(question text unique, answers text);")
	return db, err
}

func saveSection(tx *sql.Tx, section []string) error {
	if len(section) < 2 {
		return fmt.Errorf("too short section")
	}
	answers, err := json.Marshal(section[1:])
	if err != nil {
		return err
	}

	_, err = tx.Exec("INSERT INTO answers(question,answers) VALUES(?,?);", section[0], string(answers))
	return nil
}

type MatchRegexp struct {
	Question string
	Answer   []string
}

func getMatchRegexp(name string) (*MatchRegexp, error) {
	ret := &MatchRegexp{}
	fp, err := os.Open(name)
	if err != nil {
		return nil, err
	}
	defer fp.Close()
	dec := json.NewDecoder(fp)
	err = dec.Decode(ret)
	return ret, err
}
